#!/usr/bin/env python
# -*- coding: utf-8 -*-
import urllib2, urllib
import format_time
import sys,os
import re
reqUrl=('http://dsap2.web.58dns.org/dsap-agent/?targetserver=db-cdh-15-229'
        '&targetport=50075&targetpage=browseDirectory&targetprefix=jsp&namenodeInfoPort=50070'
        '&dir=%s&nnaddr=10.9.14.196:9000')	
#集群地址变了
#10.9.15.115:50075
#10.9.14.218:9000

pattern=r'''<a.*?:9000['"]>\s*([^_]*?)\s*</a>\s*</B>\s*<br>\s*</td>'''
p = re.compile(pattern,re.DOTALL | re.IGNORECASE)
def getNameList(hdfsDir):
    urlpath = reqUrl % hdfsDir
#    request=urllib2.Request(url=urlpath)
    urlfile = urllib2.urlopen(urlpath)
    pageContent=urlfile.read()
    nameList=[]
    for m in p.finditer(pageContent):
        name=m.group(1)
        print(name)
        nameList.append(name)
    return nameList

srcUrl='http://10.9.15.19:50075/streamFile%s/%s?&nnaddr=10.9.14.196:9000';
def down(nameList,hdfsDir,localDir):
    for name in nameList:
        srcPath=srcUrl % (hdfsDir,name)
        urllib.urlretrieve(srcPath,os.path.join(localDir,name))

def main(hdfsPath,targetDir,date):
    localDir=os.path.join(targetDir,date)
    if(not os.path.exists(localDir)):
        os.makedirs(localDir)
    hdfsDir = os.path.join(hdfsPath,date)
    nameList = getNameList(hdfsDir)
    down(nameList,hdfsDir,localDir)


if __name__ == '__main__':
    hdfsPath="/dsap/resultdata/ec_data/EC_Post_Detail_Job"
    startDate=format_time.get_yesterday()
    endDate=format_time.get_yesterday()
    
    if len(sys.argv) == 1:
        pass
    elif len(sys.argv) == 2:
        hdfsPath=sys.argv[1]
    elif len(sys.argv) == 3:
        hdfsPath=sys.argv[1]
        startDate=endDate=sys.argv[2]   #eg: hdfsPath 20141109
    elif len(sys.argv) == 4:  
        hdfsPath=sys.argv[1]
        startDate=sys.argv[2]
        endDate=sys.argv[3]   #eg: hdfsPath 20141109 20141112
    else:
        print("usage: wget.py hdfsPath [startDate] [endDate]")  
        sys.exit(1) 
    startDate=format_time.format_date(startDate,'')
    endDate=format_time.format_date(endDate,'')
    targetDir='../results/'
    try:
        while startDate <= endDate:
            print('%s start deal %s' % (format_time.get_now(),startDate))
            main(hdfsPath,targetDir,startDate)
            print('%s finished %s' % (format_time.get_now(),startDate))
            startDate=format_time.add_date(startDate,inSep='',outSep='')
    except Exception as e:
        sys.stderr.write(str(e))
#        postUtils.send_Msg('error!')
    sys.exit(0)
    